1 Introduction

To interrogate the different maturation states required in these two differentiation pathways we performed a single-cell transcriptional and chromatin accessibility profiling of the PCs together with a representative subset of Germinal Center B Cells (GCBC) and MBCs. A total of 12,779 cells were analyzed at scRNA-seq level which clustered into 23 subpopulations, while 4,160 cells were grouped into 13 subpopulations at the scATAC-seq level to re-force cell transfer annotation and interpretation.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(Signac)
library(GenomicRanges)
library(pheatmap)
library(JASPAR2020)
library(TFBSTools)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(data.table)
library(ggpubr)
library(writexl)
library(plyr)
library(stringr)
library(grid)
library(gridExtra)
library(EnhancedVolcano)
library(qlcMatrix)

set.seed(333)

2.2 Parameters

cell_type = "PC"

# Paths
path_to_obj <- paste0(
  here::here("scATAC-seq/results/R_objects/level_5/"),
  cell_type,
  "/05.",
  cell_type,
  "_chromVar_CISBP_level_5.rds",
  sep = ""
)

path_to_obj_RNA <- paste0(
  here::here("scRNA-seq/3-clustering/5-level_5/"),
  cell_type,
  "/PC_seu_obj_level_5_eta.rds")


color_palette <- c("#73787E", "#B8BCC1","#FECFC7" ,
                   "#FF8E7B","#a13b53","#A6E1F4", 
                   "#586BA4", "#323734","#035F72",
                   "#9CC6CF" ,"#198C19","#006600","#FFD8B1")


cels_order <- c("Dark Zone GCBC",
                "Light Zone GCBC",
                "PC committed Light Zone GCBC",
                "Early PC precursor",
                "PB",
                "IgG+ PC precursor",
                "IgM+ PC precursor",
                "IgD PC precursor", 
                "preMature IgG+ PC",
                "preMature IgM+ PC",
                "Mature PC",
                "MBC derived PC precursor",
                "class switch MBC")

colfunc <- colorRampPalette(c("#4575B4", "white","darkgreen"))

2.3 Functions

DARS <- function(ident.1,ident.2){
  DARs <- FindMarkers(
  ident.1 = ident.1,
  ident.2 = ident.2,
  object = seurat,
  min.pct = 0.05,
  test.use = 'LR',
  latent.vars = 'nCount_peaks_level_5')
  
return(DARs)}

DARS_ChromVar <- function(ident.1,ident.2){
  DARs <- FindMarkers(
  ident.1 = ident.1,
  ident.2 = ident.2,
  object = seurat,
  min.pct = 0.05,
  test.use = 'LR')
  
return(DARs)}

DARS_matrix <- function(DARs, group){
  avgexpr_mat <- AverageExpression(
  features = row.names(DARs),
  seurat,
  assays = "peaks_level_5",
  return.seurat = F,
  group.by = group,
  slot = "data")
return(avgexpr_mat)}


DARS_filtered_max <- function(mat, quant, cluster_interest){
  matrix_coefficient <- rowMax(mat) / rowSums(mat)
  hist(matrix_coefficient, breaks = 100)
  abline(v = quantile(matrix_coefficient, quant), 
         col="red", lwd=3, lty=2)
  
  regions <- which(matrix_coefficient > quantile(matrix_coefficient, 
                                                 quant))
  filtered_regions <- mat[regions,]
  filtered_regions_interest_clusters <- filtered_regions[which(apply(X = filtered_regions,
      MARGIN = 1,FUN = which.max) %in% cluster_interest),]

  return(filtered_regions_interest_clusters)}


pheatmap_plot <- function(matrices,n_cutree_rows,
                          annotation_col,mycolors_list){
  pheatmap(matrices,
      scale = "row",
      annotation_col = annotation_col,
      annotation_colors = mycolors_list,
      color = colfunc(100),
      angle_col = 45,
      show_rownames=F,
      border_color = "white",
      cluster_rows = T,
      cluster_cols = F,
      fontsize_col = 10,
      cutree_rows = n_cutree_rows,
      clustering_distance_cols = "euclidean", 
      clustering_method = "ward.D2")}

3 Load data

3.1 Load PC scRNA-seq data

seurat_RNA <- readRDS(path_to_obj_RNA)
Idents(seurat_RNA) <- "names_level_5_clusters_eta"
seurat_RNA
## An object of class Seurat 
## 37378 features across 12779 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap

3.2 Load PC scATAC-seq data

seurat <- readRDS(path_to_obj)
seurat_peaks <- seurat@assays$peaks_level_5@ranges
seurat
## An object of class Seurat 
## 107847 features across 4160 samples within 2 assays 
## Active assay: peaks_level_5 (106083 features, 104344 variable features)
##  1 other assay present: chromvar
##  3 dimensional reductions calculated: umap, lsi, harmony
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/1.UMAP_scATAC.pdf",
 #   width=5,height=5,paper='special')

DimPlot(seurat,
  reduction = "umap",
  pt.size = 1,
  cols= color_palette,
  label = FALSE) + NoLegend() + theme(plot.title = element_blank())

4 Differential accessible analysis (DARs)

A pairwise differential accessibility analysis between the major biological identities revealed an extensive chromatin modulation along the primary reaction pathway.

DefaultAssay(seurat) <- "peaks_level_5"

step1 <- DARS(ident.1 = "Light Zone GCBC", 
              ident.2 = "PC committed Light Zone GCBC")

step2 <- DARS(ident.1 = "PC committed Light Zone GCBC", 
              ident.2 = "IgG+ PC precursor")

step3 <- DARS(ident.1 = "IgG+ PC precursor", 
              ident.2 = "Mature PC")

step4 <- DARS(ident.1 = "class switch MBC", 
              ident.2 = "Mature PC")

all_DARs <- rbind(step1,step2,step3, step4)

EnhancedVolcano(all_DARs, 
                lab = NA,
                pCutoff = 10e-03,
                FCcutoff = 0.5,
                labSize = 4,
                legendLabSize = 8,
                x = 'avg_log2FC', y = 'p_val_adj') 

The following statistical cutoffs where applied to filter the DARs: avg_log2FC >= 0,5 and p adjusted value <= 10e-03.

step1_filtered <- step1[which(abs(step1$avg_log2FC) >= 0.5 &
                                    step1$p_val_adj <= 10e-03),]

step2_filtered <- step2[which(abs(step2$avg_log2FC) >= 0.5 &
                                    step2$p_val_adj <= 10e-03),]

step3_filtered <- step3[which(abs(step3$avg_log2FC) >= 0.5 &
                                    step3$p_val_adj <= 10e-03),]

step4_filtered <- step4[which(abs(step4$avg_log2FC) >= 0.5 &
                                    step4$p_val_adj <= 10e-03),]

nrow(step1_filtered)
## [1] 228
nrow(step2_filtered)
## [1] 5774
nrow(step3_filtered)
## [1] 446
nrow(step4_filtered)
## [1] 2892
DARS <- rev(c(nrow(step1_filtered),nrow(step2_filtered),
          nrow(step3_filtered),nrow(step4_filtered)))

steps <- rev(c("step1", "step2", "step3", "step4"))

df <- data.frame(steps, round(DARS/sum(DARS) * 100,2))
colnames(df) <- c("steps","DARS")

#ggsave("/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/DARs_barplot.pdf")

ggbarplot(df, "steps", "DARS", 
          orientation = "horiz",
          label = TRUE) + scale_y_continuous(limits=c(0, 100))

5 Module-specific DARs

To evaluate the general landscape of the chromatin remodeling during PC differentiation, we combined the obtained DARs into a unique heatmap. A hierarchical clustering based on the Euclidean distance measure and ward.D2 method revealed three main modules of chromatin dynamics.

DefaultAssay(seurat) <- "peaks_level_5"
step1_mat <- DARS_matrix(DARs = step1_filtered,
                         group = "annotation_level_5")
step2_mat <- DARS_matrix(DARs = step2_filtered,
                        group = "annotation_level_5")
step3_mat <- DARS_matrix(DARs = step3_filtered,
                         group = "annotation_level_5")
step4_mat <- DARS_matrix(DARs = step4_filtered,
                         group = "annotation_level_5")

path_matrix <- rbind(step1_mat$peaks_level_5,
                      step2_mat$peaks_level_5,
                      step3_mat$peaks_level_5,
                      step4_mat$peaks_level_5)

cell_types <- names(table(seurat$annotation_level_5))
annotation_col = data.frame(
                    cell_type = cell_types) 
rownames(annotation_col) <- cell_types

mycolors <- color_palette
names(mycolors) <- cell_types
mycolors_list <- list(mycolors)
names(mycolors_list) <- "cell_type"

#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/3.Heatmap_DARs.pdf",
 #   width = 8,height = 6)
entire_heatmap <- pheatmap_plot(path_matrix[,cels_order],n_cutree_rows = 3,
                                annotation_col, 
                                mycolors_list)
entire_heatmap

cutree_out = cutree(entire_heatmap$tree_row, k=3)
cutree_df <- as.data.frame(cbind(names(cutree_out),cutree_out))
cluster1 <- cutree_df[cutree_df$cutree_out == 1,]
cluster2 <- cutree_df[cutree_df$cutree_out == 2,]
cluster3 <- cutree_df[cutree_df$cutree_out == 3,]

As module-specific DARs likely contain specific DNA motifs that allow binding of specific TF, we performed an enrichment analysis and identified several key binding motifs of expressed TF related to the cell identity.

5.1 Cluster 1

5.1.1 Selection of top overrepresented expressed TF.

Motif_enr_cluster1 <- FindMotifs(object = seurat,
                             features = cluster1$V1)
Motif_enr_cluster1$cluster <- 1

avgexpr_mat <- AverageExpression(
  features = Motif_enr_cluster1$motif.name,
  seurat_RNA,
  return.seurat = F,
  group.by = "annotation_level_1",
  slot = "data")

# Removing motifs with non-expression in any main population
avgexpr_df_filt <- filter(as.data.frame(avgexpr_mat$RNA),
                           GCBC > 0, NBC_MBC > 0, PC > 0)

DT::datatable(avgexpr_df_filt)
DT::datatable(Motif_enr_cluster1)
FeaturePlot(
  object = seurat,
  features = c("ENSG00000164330-LINE255-EBF1-D-N1",
               "ENSG00000196092-LINE3170-PAX5-D-N8"),
  min.cutoff = 'q1',
  max.cutoff = 'q99',
  pt.size = 0.1,
  ncol = 2
)

5.1.2 Subclustering module 1: PC_committed-module

clustering1_filtered <- DARS_filtered_max(path_matrix[cluster1$V1,], 0.6, 3)

#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/4.Heatmap_PC_committed_DARs.pdf")
pheatmap_plot(clustering1_filtered[,cels_order],n_cutree_rows = 1, annotation_col, mycolors_list)

seurat <- AddModuleScore(
    seurat,
    name = 'PC-committed-module',
    features = list(row.names(clustering1_filtered)))


colfunc <- colorRampPalette(c("#4575B4", "white","darkgreen"))

#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/5.Module_PC-committed_DARs.pdf",
 #   width=5,height=5,paper='special')

FeaturePlot(seurat,
      order = TRUE,
      min.cutoff = 'q1',
      max.cutoff = 'q99',
      pt.size = 0.8,
      features  = "PC.committed.module1") + 
  theme(plot.title = element_blank()) + 
  scale_color_gradientn( colours = c('lightgrey', 'darkgreen'))

5.1.2.1 Enrichment motif analysis of PC_committed-module

clustering1_filtered_motifs <- FindMotifs(object = seurat,
                             features = row.names(clustering1_filtered))

DT::datatable(clustering1_filtered_motifs)
#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/6.Volcano_plot_POU.pdf",
 #   width=5,height=8,paper='special')

EnhancedVolcano(clustering1_filtered_motifs, 
                pCutoff = 10e-03,
                FCcutoff = 0.5,
                labSize = 4,
                legendLabSize = 8,
                lab = clustering1_filtered_motifs$motif.name,
                x = 'fold.enrichment', y = 'pvalue') 

#pdf(file="/Users/pauli/Desktop/PAPER_FIGURES/ETA/2.scATAC/7.UMAP_POU.pdf",
 #   width=5,height=5,paper='special')

FeaturePlot(seurat,
      order = TRUE,
      features = "ENSG00000137709-LINE2649-POU2F3-D-N1",
      min.cutoff = 'q1',
      max.cutoff = 'q99',
      pt.size = 0.8) + 
  theme(plot.title = element_blank()) + scale_color_gradientn( colours = c('lightgrey', 'chartreuse3'))

5.2 Cluster 2

Motif_enr_cluster2 <- FindMotifs(object = seurat,
                             features = cluster2$V1)

avgexpr_mat <- AverageExpression(
  features = Motif_enr_cluster2$motif.name,
  seurat_RNA,
  return.seurat = F,
  group.by = "annotation_level_1",
  slot = "data")

# Removing motifs with non-expression in any main population
avgexpr_df_filt <- filter(as.data.frame(avgexpr_mat$RNA),
                           GCBC > 0, NBC_MBC > 0, PC > 0)

DT::datatable(avgexpr_df_filt)
DT::datatable(Motif_enr_cluster2)
FeaturePlot(
  object = seurat,
  features = c("ENSG00000137265-LINE2748-IRF4-D-N1",
               "ENSG00000140968-LINE2752-IRF8-D-N2"),
  min.cutoff = 'q1',
  max.cutoff = 'q99',
  pt.size = 0.1,
  ncol = 2
)

5.3 Cluster 3

Motif_enr_cluster3 <- FindMotifs(object = seurat,
                             features = cluster3$V1)

avgexpr_mat <- AverageExpression(
  features = Motif_enr_cluster3$motif.name,
  seurat_RNA,
  return.seurat = F,
  group.by = "annotation_level_1",
  slot = "data")

# Removing motifs with non-expression in any main population
avgexpr_df_filt <- filter(as.data.frame(avgexpr_mat$RNA),
                           GCBC > 0, NBC_MBC > 0, PC > 0)

DT::datatable(avgexpr_df_filt)
DT::datatable(Motif_enr_cluster3)
FeaturePlot(
  object = seurat,
  features = c("ENSG00000142539-LINE1880-SPIB-D-N2",
               "ENSG00000175832-LINE1927-ETV4-D-N1"),
  min.cutoff = 'q1',
  max.cutoff = 'q99',
  pt.size = 0.1,
  ncol = 2
)

6 Differential motif activity analysis across the main states of PC maturation

DefaultAssay(seurat) <- "chromvar"

step1_chromVar <- DARS_ChromVar(ident.1 = "Light Zone GCBC", 
                                ident.2 = "PC committed Light Zone GCBC")

step2_chromVar <- DARS_ChromVar(ident.1 = "PC committed Light Zone GCBC", 
                                ident.2 = "IgG+ PC precursor")

step3_chromVar <- DARS_ChromVar(ident.1 = "IgG+ PC precursor", 
                                ident.2 = "Mature PC")

step4_chromVar <- DARS_ChromVar(ident.1 = "class switch MBC", 
                                ident.2 = "Mature PC")


step1_chromVar_filtered <- step1_chromVar[which(abs(step1_chromVar$avg_log2FC) >= 0.5 &
                            step1_chromVar$p_val_adj <= 10e-05),]

step2_chromVar_filtered <- step2_chromVar[which(abs(step2_chromVar$avg_log2FC) >= 0.5 &
                            step2_chromVar$p_val_adj <= 10e-05),]

step3_chromVar_filtered <- step2_chromVar[which(abs(step3_chromVar$avg_log2FC) >= 0.5 &
                            step3_chromVar$p_val_adj <= 10e-05),]

step4_chromVar_filtered <- step4_chromVar[which(abs(step4_chromVar$avg_log2FC) >= 0.5 &
                            step4_chromVar$p_val_adj <= 10e-05),]
DT::datatable(step1_chromVar_filtered)
DT::datatable(step2_chromVar_filtered)
DT::datatable(step3_chromVar_filtered)
DT::datatable(step4_chromVar_filtered)

7 Session Info

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] qlcMatrix_0.9.7        sparsesvd_0.2          slam_0.1-47            Matrix_1.3-4           EnhancedVolcano_1.13.2 ggrepel_0.9.1          gridExtra_2.3          plyr_1.8.6             writexl_1.4.0          ggpubr_0.4.0           data.table_1.14.0      forcats_0.5.0          stringr_1.4.0          purrr_0.3.4            readr_1.4.0            tidyr_1.1.3            tibble_3.1.2           tidyverse_1.3.0        ggplot2_3.3.5          dplyr_1.0.7            TFBSTools_1.26.0       JASPAR2020_0.99.10     pheatmap_1.0.12        GenomicRanges_1.40.0   GenomeInfoDb_1.24.2    IRanges_2.22.1         S4Vectors_0.26.0       BiocGenerics_0.34.0    Signac_1.2.1           SeuratObject_4.0.2     Seurat_4.0.3           BiocStyle_2.16.1      
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1                  reticulate_1.20             R.utils_2.10.1              tidyselect_1.1.1            poweRlaw_0.70.6             RSQLite_2.2.1               AnnotationDbi_1.50.3        htmlwidgets_1.5.3           docopt_0.7.1                BiocParallel_1.22.0         Rtsne_0.15                  munsell_0.5.0               codetools_0.2-17            ica_1.0-2                   DT_0.16                     future_1.21.0               miniUI_0.1.1.1              withr_2.4.2                 colorspace_2.0-2            Biobase_2.48.0              knitr_1.30                  rstudioapi_0.11             ROCR_1.0-11                 ggsignif_0.6.0              tensor_1.5                  listenv_0.8.0               labeling_0.4.2              GenomeInfoDbData_1.2.3      polyclip_1.10-0             bit64_4.0.5                 farver_2.1.0                rprojroot_2.0.2             parallelly_1.26.1           vctrs_0.3.8                 generics_0.1.0              xfun_0.18                   lsa_0.73.2                  ggseqlogo_0.1               R6_2.5.0                    bitops_1.0-7                spatstat.utils_2.2-0        DelayedArray_0.14.0        
##  [43] assertthat_0.2.1            promises_1.2.0.1            scales_1.1.1                gtable_0.3.0                globals_0.14.0              goftest_1.2-2               seqLogo_1.54.3              rlang_0.4.11                RcppRoll_0.3.0              splines_4.0.3               rstatix_0.6.0               rtracklayer_1.48.0          lazyeval_0.2.2              broom_0.7.2                 spatstat.geom_2.2-0         modelr_0.1.8                BiocManager_1.30.10         yaml_2.2.1                  reshape2_1.4.4              abind_1.4-5                 crosstalk_1.1.1             backports_1.1.10            httpuv_1.6.1                tools_4.0.3                 bookdown_0.21               ellipsis_0.3.2              spatstat.core_2.2-0         RColorBrewer_1.1-2          ggridges_0.5.3              Rcpp_1.0.6                  zlibbioc_1.34.0             RCurl_1.98-1.2              rpart_4.1-15                deldir_0.2-10               pbapply_1.4-3               cowplot_1.1.1               zoo_1.8-9                   haven_2.3.1                 SummarizedExperiment_1.18.1 cluster_2.1.0               here_1.0.1                  fs_1.5.0                   
##  [85] magrittr_2.0.1              scattermore_0.7             openxlsx_4.2.4              reprex_0.3.0                lmtest_0.9-38               RANN_2.6.1                  SnowballC_0.7.0             fitdistrplus_1.1-5          matrixStats_0.59.0          hms_0.5.3                   patchwork_1.1.1             mime_0.11                   evaluate_0.14               xtable_1.8-4                XML_3.99-0.3                rio_0.5.16                  readxl_1.3.1                compiler_4.0.3              KernSmooth_2.23-17          crayon_1.4.1                R.oo_1.24.0                 htmltools_0.5.1.1           mgcv_1.8-33                 later_1.2.0                 lubridate_1.7.9             DBI_1.1.0                   tweenr_1.0.1                dbplyr_1.4.4                MASS_7.3-53                 car_3.0-10                  cli_3.0.0                   R.methodsS3_1.8.1           igraph_1.2.6                pkgconfig_2.0.3             GenomicAlignments_1.24.0    TFMPvalue_0.0.8             foreign_0.8-80              plotly_4.9.4.1              spatstat.sparse_2.0-0       xml2_1.3.2                  annotate_1.66.0             DirichletMultinomial_1.30.0
## [127] XVector_0.28.0              rvest_0.3.6                 digest_0.6.27               sctransform_0.3.2           RcppAnnoy_0.0.18            pracma_2.2.9                CNEr_1.24.0                 spatstat.data_2.1-0         Biostrings_2.56.0           cellranger_1.1.0            rmarkdown_2.5               leiden_0.3.8                fastmatch_1.1-0             uwot_0.1.10                 curl_4.3.2                  shiny_1.6.0                 Rsamtools_2.4.0             gtools_3.9.2                lifecycle_1.0.0             nlme_3.1-150                jsonlite_1.7.2              carData_3.0-4               viridisLite_0.4.0           BSgenome_1.56.0             fansi_0.5.0                 pillar_1.6.1                lattice_0.20-41             KEGGREST_1.28.0             fastmap_1.1.0               httr_1.4.2                  survival_3.2-7              GO.db_3.11.4                glue_1.4.2                  zip_2.1.1                   png_0.1-7                   bit_4.0.4                   ggforce_0.3.2               stringi_1.6.2               blob_1.2.1                  caTools_1.18.2              memoise_1.1.0               irlba_2.3.3                
## [169] future.apply_1.7.0